-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[mlir][memref] Remove redundant memref.tensor_store
op
#71010
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][memref] Remove redundant memref.tensor_store
op
#71010
Conversation
@llvm/pr-subscribers-mlir-linalg @llvm/pr-subscribers-mlir Author: Matthias Springer (matthias-springer) Changes
Patch is 23.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71010.diff 15 Files Affected:
diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md
index 0cfe845638c3cfb..52b0d8317eef7f2 100644
--- a/mlir/docs/LangRef.md
+++ b/mlir/docs/LangRef.md
@@ -77,10 +77,12 @@ func.func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf3
// Allocate addressable "buffers" and copy tensors %A and %B into them.
%A_m = memref.alloc(%n) : memref<100x?xf32>
- memref.tensor_store %A to %A_m : memref<100x?xf32>
+ bufferization.materialize_in_destination %A in writable %A_m
+ : (tensor<100x?xf32>, memref<100x?xf32>) -> ()
%B_m = memref.alloc(%n) : memref<?x50xf32>
- memref.tensor_store %B to %B_m : memref<?x50xf32>
+ bufferization.materialize_in_destination %B in writable %B_m
+ : (tensor<?x50xf32>, memref<?x50xf32>) -> ()
// Call function @multiply passing memrefs as arguments,
// and getting returned the result of the multiplication.
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 9e3f79e64bb1d79..17e367e3d59281b 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -103,7 +103,7 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
```
%alloc = memref.alloc() : memref<10xf32>
- memref.tensor_store %dest, %alloc : memref<10xf32>
+ bufferization.materialize_in_destination %dest in %alloc
memref.store %f, %alloc[%pos] : memref<10xf32>
%0 = bufferization.to_tensor %alloc restrict writable : memref<10xf32>
```
@@ -118,15 +118,16 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
An optional memory space attribute can be specified for the materialized
buffer allocation.
- If a memory copy is needed, a "memref.tensor_store" is used when possible.
- This is an op with tensor semantics that will bufferize to a memory copy
- later. Which concrete op will be used for the memory copy is up to the
- bufferization framework. Alternatively, a custom memcpy op can be specified
- via `memcpy_op`. Currently supported are "memref.copy" and "linalg.copy".
- In that case, the source of each memcpy must not have a custom memory space.
- Furthermore, because the future buffer layout unknown for a given tensor,
- a fully dynamic layout is assumed for best compatibility. Users should use
- "memref.tensor_store" when possible.
+ If a memory copy is needed, a "bufferization.materialize_in_destination" is
+ used when possible. This is an op with tensor semantics that will bufferize
+ to a memory copy later. Which concrete op will be used for the memory copy
+ is up to the bufferization framework. Alternatively, a custom memcpy op can
+ be specified via `memcpy_op`. Currently supported are "memref.copy" and
+ "linalg.copy". In that case, the source of each memcpy must not have a
+ custom memory space. Furthermore, because the future buffer layout unknown
+ for a given tensor, a fully dynamic layout is assumed for best
+ compatibility. Users should use "bufferization.materialize_in_destination"
+ when possible.
"memref.alloc" is used for new buffer allocations. The buffer is deallocated
at the end of the block if the "emit_dealloc" attribute is present. If this
@@ -148,7 +149,8 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
let arguments = (ins TransformHandleTypeInterface:$target,
OptionalAttr<AnyAttr>:$memory_space,
- DefaultValuedAttr<StrAttr, "\"memref.tensor_store\"">:
+ DefaultValuedAttr<StrAttr,
+ "\"bufferization.materialize_in_destination\"">:
$memcpy_op,
DefaultValuedAttr<StrAttr, "\"memref.alloc\"">:
$alloc_op,
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 122f73562852101..069307022b75bc5 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -50,8 +50,12 @@ struct BufferizeToAllocationOptions {
enum class AllocOp { MemrefAlloc = 0, MemrefAlloca = 1 };
AllocOp allocOp = AllocOp::MemrefAlloc;
- enum class MemcpyOp { MemrefTensorStore = 0, MemrefCopy = 1, LinalgCopy = 2 };
- MemcpyOp memcpyOp = MemcpyOp::MemrefTensorStore;
+ enum class MemcpyOp {
+ MaterializeInDestination = 0,
+ MemrefCopy = 1,
+ LinalgCopy = 2
+ };
+ MemcpyOp memcpyOp = MemcpyOp::MaterializeInDestination;
/// If set to "true", only the destination tensor operands are bufferized to
/// a new allocation (and wrapped in "bufferization.to_tensor"), but not the
@@ -66,7 +70,8 @@ struct BufferizeToAllocationOptions {
};
/// Materialize a buffer allocation for the given tensor.pad op and lower the
-/// op to linalg.fill/linalg.generic + memref.tensor_store. E.g.:
+/// op to linalg.fill/linalg.generic + bufferization.materialize_in_destination.
+/// E.g.:
///
/// %0 = tensor.pad low[%l] high[%h] %t ...
///
@@ -75,7 +80,7 @@ struct BufferizeToAllocationOptions {
/// %alloc = memref.alloc
/// linalg.fill ... outs(%alloc)
/// %subview = memref.subview %alloc [%l] [...] [1]
-/// memref.tensor_store %t, %subview
+/// bufferization.materialize_in_destination %t in %subview
/// %0 = bufferization.to_tensor %alloc restrict writable
///
/// In addition to rewriting the IR as shown above, this function returns the
@@ -96,7 +101,7 @@ Value bufferizeToAllocation(RewriterBase &rewriter,
/// is lowered to:
///
/// %alloc = memref.alloc
-/// memref.tensor_store %t, %subview
+/// bufferization.materialize_in_destination %t in %subview
/// vector.mask {
/// vector.transfer_write %arg0, %alloc : vector<16xf32>, memref<?xf32>
/// } : vector<16xi1>
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 8fa41f4e4b659f5..87bcc1fe1134f5d 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -2095,37 +2095,6 @@ def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [
let hasVerifier = 1;
}
-//===----------------------------------------------------------------------===//
-// TensorStoreOp
-//===----------------------------------------------------------------------===//
-
-def TensorStoreOp : MemRef_Op<"tensor_store",
- [SameOperandsShape, SameOperandsElementType,
- TypesMatchWith<"type of 'value' matches tensor equivalent of 'memref'",
- "memref", "tensor",
- "getTensorTypeFromMemRefType($_self)">]> {
- let summary = "tensor store operation";
- let description = [{
- Stores the contents of a tensor into a memref. The first operand is a value
- of tensor type, the second operand is a value of memref type. The shapes and
- element types of these must match, and are specified by the memref type.
-
- Example:
-
- ```mlir
- %9 = dim %8, 1 : tensor<4x?xf32>
- %10 = memref.alloc(%9) : memref<4x?xf32, #layout, memspace0>
- memref.tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
- ```
- }];
-
- let arguments = (ins AnyTensor:$tensor, Arg<AnyRankedOrUnrankedMemRef,
- "the reference to store to",
- [MemWriteAt<0, FullEffect>]>:$memref);
-
- let assemblyFormat = "$tensor `,` $memref attr-dict `:` type($memref)";
-}
-
//===----------------------------------------------------------------------===//
// TransposeOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h
deleted file mode 100644
index 7e532100e4eeb74..000000000000000
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h
+++ /dev/null
@@ -1,21 +0,0 @@
-//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_MEMREF_BUFFERIZABLEOPINTERFACEIMPL_H
-#define MLIR_DIALECT_MEMREF_BUFFERIZABLEOPINTERFACEIMPL_H
-
-namespace mlir {
-
-class DialectRegistry;
-
-namespace memref {
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry);
-} // namespace memref
-} // namespace mlir
-
-#endif // MLIR_DIALECT_MEMREF_BUFFERIZABLEOPINTERFACEIMPL_H
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 621110d130818d3..6c9bc613d30cd42 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -53,7 +53,6 @@
#include "mlir/Dialect/MemRef/IR/MemRefMemorySlot.h"
#include "mlir/Dialect/MemRef/IR/ValueBoundsOpInterfaceImpl.h"
#include "mlir/Dialect/MemRef/Transforms/AllocationOpInterfaceImpl.h"
-#include "mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/MemRef/Transforms/RuntimeOpVerification.h"
#include "mlir/Dialect/Mesh/IR/MeshOps.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
@@ -156,7 +155,6 @@ inline void registerAllDialects(DialectRegistry ®istry) {
linalg::registerTilingInterfaceExternalModels(registry);
linalg::registerValueBoundsOpInterfaceExternalModels(registry);
memref::registerAllocationOpInterfaceExternalModels(registry);
- memref::registerBufferizableOpInterfaceExternalModels(registry);
memref::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
memref::registerValueBoundsOpInterfaceExternalModels(registry);
memref::registerMemorySlotExternalModels(registry);
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 033eeac1939fc49..ec5feab1ed0d856 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -585,7 +585,11 @@ MaterializeInDestinationOp::bufferize(RewriterBase &rewriter,
assert(isa<BaseMemRefType>(getDest().getType()) && "expected memref type");
buffer = getDest();
}
- rewriter.create<memref::TensorStoreOp>(getLoc(), getSource(), buffer);
+ auto srcBuffer = getBuffer(rewriter, getSource(), options);
+ if (failed(srcBuffer))
+ return failure();
+ if (failed(options.createMemCpy(rewriter, getLoc(), *srcBuffer, buffer)))
+ return failure();
replaceOpWithBufferizedValues(rewriter, getOperation(),
tensorDest ? ValueRange(buffer) : ValueRange());
return success();
@@ -682,8 +686,9 @@ LogicalResult MaterializeInDestinationOp::verify() {
void MaterializeInDestinationOp::build(OpBuilder &builder,
OperationState &state, Value source,
Value dest) {
- assert(isa<TensorType>(dest.getType()) && "expected tensor type");
- build(builder, state, /*result=*/dest.getType(), source, dest);
+ auto destTensorType = dyn_cast<TensorType>(dest.getType());
+ build(builder, state, /*result=*/destTensorType ? destTensorType : Type(),
+ source, dest);
}
bool MaterializeInDestinationOp::isWritable(Value value,
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 87be3bb85b6e788..e6ab2bcf5fd37ae 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -241,9 +241,9 @@ DiagnosedSilenceableFailure transform::BufferizeToAllocationOp::apply(
rewriter.setListener(&newOpsListener);
linalg::BufferizeToAllocationOptions options;
- if (getMemcpyOp() == "memref.tensor_store") {
- options.memcpyOp =
- linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore;
+ if (getMemcpyOp() == "bufferization.materialize_in_destination") {
+ options.memcpyOp = linalg::BufferizeToAllocationOptions::MemcpyOp::
+ MaterializeInDestination;
} else if (getMemcpyOp() == "memref.copy") {
options.memcpyOp =
linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy;
@@ -296,7 +296,7 @@ void transform::BufferizeToAllocationOp::getEffects(
}
LogicalResult transform::BufferizeToAllocationOp::verify() {
- if (getMemcpyOp() != "memref.tensor_store" &&
+ if (getMemcpyOp() != "bufferization.materialize_in_destination" &&
getMemcpyOp() != "memref.copy" && getMemcpyOp() != "linalg.copy")
return emitOpError() << "unsupported memcpy op";
if (getAllocOp() != "memref.alloc" && getAllocOp() != "memref.alloca")
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
index f7340844f7e1977..4c7237a88419e77 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
@@ -63,11 +63,14 @@ static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource,
assert(memrefDest.getType().isa<MemRefType>() && "expected ranked memref");
switch (options.memcpyOp) {
- case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore:
+ case linalg::BufferizeToAllocationOptions::MemcpyOp::
+ MaterializeInDestination: {
// Note: This is the preferred way of memcpy'ing because no layout map
// and/or memory space must be specified for the source.
- b.create<memref::TensorStoreOp>(loc, tensorSource, memrefDest);
- break;
+ auto materializeOp = b.create<bufferization::MaterializeInDestinationOp>(
+ loc, tensorSource, memrefDest);
+ materializeOp.setWritable(true);
+ } break;
case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy: {
// TODO: Support custom memory space on source.
// We do not know the layout map of the source yet, so use a fully dynamic
@@ -238,7 +241,7 @@ Value linalg::bufferizeToAllocation(
rewriter.setInsertionPointAfter(fillOp);
}
- // Create memref.tensor_store.
+ // Create memcpy.
SmallVector<OpFoldResult> sizes =
getMixedSizes(rewriter, loc, padOp.getSource());
SmallVector<OpFoldResult> strides(padOp.getResultType().getRank(),
diff --git a/mlir/lib/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.cpp
deleted file mode 100644
index 9687a83a6172fe2..000000000000000
--- a/mlir/lib/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h"
-
-#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Operation.h"
-
-using namespace mlir;
-using namespace mlir::bufferization;
-
-namespace {
-/// Bufferization of memref.tensor_store. Replace with memref.copy.
-struct TensorStoreOpInterface
- : public BufferizableOpInterface::ExternalModel<TensorStoreOpInterface,
- memref::TensorStoreOp> {
- AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand,
- const AnalysisState &state) const {
- return {};
- }
-
- bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
- const AnalysisState &state) const {
- assert(opOperand.getOperandNumber() == 0 && "expected src operand");
- return true;
- }
-
- bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
- const AnalysisState &state) const {
- // The memref operand is written but not the tensor operand.
- assert(opOperand.getOperandNumber() == 0 && "expected src operand");
- return false;
- }
-
- LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
- const BufferizationOptions &options) const {
- auto tensorStoreOp = cast<memref::TensorStoreOp>(op);
- auto srcBuffer = getBuffer(rewriter, tensorStoreOp.getTensor(), options);
- if (failed(srcBuffer))
- return failure();
- if (failed(options.createMemCpy(rewriter, op->getLoc(), *srcBuffer,
- tensorStoreOp.getMemref())))
- return failure();
- rewriter.eraseOp(tensorStoreOp);
- return success();
- }
-};
-
-} // namespace
-
-void mlir::memref::registerBufferizableOpInterfaceExternalModels(
- DialectRegistry ®istry) {
- registry.addExtension(+[](MLIRContext *ctx, MemRefDialect *dialect) {
- TensorStoreOp::attachInterface<TensorStoreOpInterface>(*ctx);
- });
-}
diff --git a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
index b16c281c93640ea..08b7eab726eb7e8 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
@@ -1,6 +1,5 @@
add_mlir_dialect_library(MLIRMemRefTransforms
AllocationOpInterfaceImpl.cpp
- BufferizableOpInterfaceImpl.cpp
ComposeSubView.cpp
ExpandOps.cpp
ExpandRealloc.cpp
diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
index 73c5e28d1200e4c..49a52ba9e06f865 100644
--- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
@@ -15,7 +15,7 @@
// CHECK: linalg.fill ins(%[[c50]] : index) outs(%[[alloc]] : memref<?x?xindex>)
// CHECK: %[[dim0:.*]] = tensor.dim %[[t]], %[[c0]]
// CHECK: %[[subview:.*]] = memref.subview %[[alloc]][5, %[[l2]]] [%[[dim0]], 10] [1, 1]
-// CHECK: memref.tensor_store %[[t]], %[[subview]]
+// CHECK: bufferization.materialize_in_destination %[[t]] in writable %[[subview]]
// CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] restrict writable : memref<?x?xindex>
// CHECK: memref.dealloc %[[alloc]]
// CHECK: return %[[r]]
@@ -40,9 +40,9 @@ module attributes {transform.with_named_sequence} {
transform.test_print_number_of_associated_payload_ir_ops %fill_op : !transform.any_op
// Ensure that one linalg.copy was generated.
- %tensor_store = transform.select "memref.tensor_store" in %new : (!transform.any_op) -> !transform.any_op
+ %mat = transform.select "bufferization.materialize_in_destination" in %new : (!transform.any_op) -> !transform.any_op
// expected-remark @below{{1}}
- transform.test_print_number_of_associated_payload_ir_ops %tensor_store : !transform.any_op
+ transform.test_print_number_of_associated_payload_ir_ops %mat : !transform.any_op
transform.yield
}
}
@@ -50,7 +50,7 @@ module attributes {transform.with_named_sequence} {
// -----
// CHECK-LABEL: func @tensor_pad_constant_with_custom_copy(
-// CHECK-NOT: memref.tensor_store
+// CHECK-NOT: bufferization.materialize_in_destination
// CHECK-NOT: memref.copy
// CHECK: memref.alloca
// CHECK: linalg.copy
@@ -194,7 +194,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func @vector_mask(
// CHECK-SAME: %[[t:.*]]: tensor<?xf32>,
// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}) : memref<?xf32, 4>
-// CHECK: memref.tensor_store %[[t]], %[[alloc]]
+// CHECK: bufferization.materialize_in_destination %[[t]] in writable %[[alloc]]
// CHECK: vector.mask %{{.*}} { vector.transfer_write %{{.*}}, %[[alloc]]
// CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] rest...
[truncated]
|
`bufferization.materialize_in_destination` should be used instead. Both ops bufferize to a memcpy. This change also conceptually cleans up the memref dialect a bit: the memref dialect no longer contains ops that operate on tensor values. BEGIN_PUBLIC No public commit message needed for presubmit. END_PUBLIC
5a1c0d4
to
fea7c62
Compare
* replace memremf.tensor_store with bufferization.materialize_in_destination as per llvm/llvm-project#71010 * update LKG Triton as well
bufferization.materialize_in_destination
should be used instead. Both ops bufferize to a memcpy. This change also conceptually cleans up the memref dialect a bit: the memref dialect no longer contains ops that operate on tensor values.